In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
import calendar 
from IPython.display import HTML
import plotly.express as px
In [2]:
data = pd.read_csv("C:\\Users\\TECH\\Desktop\\Data Science\\Movies Streams project\\moviestreams.csv" )
data
Out[2]:
Unnamed: 0 ID Title Year Age IMDb Rotten Tomatoes Netflix Hulu Prime Video Disney+ Type Directors Genres Country Language Runtime
0 0 1 Inception 2010 13+ 8.8 87% 1 0 0 0 0 Christopher Nolan Action,Adventure,Sci-Fi,Thriller United States,United Kingdom English,Japanese,French 148.0
1 1 2 The Matrix 1999 18+ 8.7 87% 1 0 0 0 0 Lana Wachowski,Lilly Wachowski Action,Sci-Fi United States English 136.0
2 2 3 Avengers: Infinity War 2018 13+ 8.5 84% 1 0 0 0 0 Anthony Russo,Joe Russo Action,Adventure,Sci-Fi United States English 149.0
3 3 4 Back to the Future 1985 7+ 8.5 96% 1 0 0 0 0 Robert Zemeckis Adventure,Comedy,Sci-Fi United States English 116.0
4 4 5 The Good, the Bad and the Ugly 1966 18+ 8.8 97% 1 0 1 0 0 Sergio Leone Western Italy,Spain,West Germany Italian 161.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
16739 16739 16740 The Ghosts of Buxley Hall 1980 NaN 6.2 NaN 0 0 0 1 0 Bruce Bilson Comedy,Family,Fantasy,Horror United States English 120.0
16740 16740 16741 The Poof Point 2001 7+ 4.7 NaN 0 0 0 1 0 Neal Israel Comedy,Family,Sci-Fi United States English 90.0
16741 16741 16742 Sharks of Lost Island 2013 NaN 5.7 NaN 0 0 0 1 0 Neil Gelinas Documentary United States English NaN
16742 16742 16743 Man Among Cheetahs 2017 NaN 6.6 NaN 0 0 0 1 0 Richard Slater-Jones Documentary United States English NaN
16743 16743 16744 In Beaver Valley 1950 NaN NaN NaN 0 0 0 1 0 James Algar Documentary,Short,Family United States English 32.0

16744 rows × 17 columns

In [3]:
df=pd.DataFrame(data)
cols=df.columns.tolist()
cols
Out[3]:
['Unnamed: 0',
 'ID',
 'Title',
 'Year',
 'Age',
 'IMDb',
 'Rotten Tomatoes',
 'Netflix',
 'Hulu',
 'Prime Video',
 'Disney+',
 'Type',
 'Directors',
 'Genres',
 'Country',
 'Language',
 'Runtime']
In [4]:
data = data.drop(['Unnamed: 0','ID'], axis=1)
In [5]:
data
Out[5]:
Title Year Age IMDb Rotten Tomatoes Netflix Hulu Prime Video Disney+ Type Directors Genres Country Language Runtime
0 Inception 2010 13+ 8.8 87% 1 0 0 0 0 Christopher Nolan Action,Adventure,Sci-Fi,Thriller United States,United Kingdom English,Japanese,French 148.0
1 The Matrix 1999 18+ 8.7 87% 1 0 0 0 0 Lana Wachowski,Lilly Wachowski Action,Sci-Fi United States English 136.0
2 Avengers: Infinity War 2018 13+ 8.5 84% 1 0 0 0 0 Anthony Russo,Joe Russo Action,Adventure,Sci-Fi United States English 149.0
3 Back to the Future 1985 7+ 8.5 96% 1 0 0 0 0 Robert Zemeckis Adventure,Comedy,Sci-Fi United States English 116.0
4 The Good, the Bad and the Ugly 1966 18+ 8.8 97% 1 0 1 0 0 Sergio Leone Western Italy,Spain,West Germany Italian 161.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
16739 The Ghosts of Buxley Hall 1980 NaN 6.2 NaN 0 0 0 1 0 Bruce Bilson Comedy,Family,Fantasy,Horror United States English 120.0
16740 The Poof Point 2001 7+ 4.7 NaN 0 0 0 1 0 Neal Israel Comedy,Family,Sci-Fi United States English 90.0
16741 Sharks of Lost Island 2013 NaN 5.7 NaN 0 0 0 1 0 Neil Gelinas Documentary United States English NaN
16742 Man Among Cheetahs 2017 NaN 6.6 NaN 0 0 0 1 0 Richard Slater-Jones Documentary United States English NaN
16743 In Beaver Valley 1950 NaN NaN NaN 0 0 0 1 0 James Algar Documentary,Short,Family United States English 32.0

16744 rows × 15 columns

In [6]:
cols
Out[6]:
['Unnamed: 0',
 'ID',
 'Title',
 'Year',
 'Age',
 'IMDb',
 'Rotten Tomatoes',
 'Netflix',
 'Hulu',
 'Prime Video',
 'Disney+',
 'Type',
 'Directors',
 'Genres',
 'Country',
 'Language',
 'Runtime']
In [7]:
data.isna().sum()
Out[7]:
Title                  0
Year                   0
Age                 9390
IMDb                 571
Rotten Tomatoes    11586
Netflix                0
Hulu                   0
Prime Video            0
Disney+                0
Type                   0
Directors            726
Genres               275
Country              435
Language             599
Runtime              592
dtype: int64
In [8]:
data['Age']
Out[8]:
0        13+
1        18+
2        13+
3         7+
4        18+
        ... 
16739    NaN
16740     7+
16741    NaN
16742    NaN
16743    NaN
Name: Age, Length: 16744, dtype: object
In [9]:
ageMap={'13+':13,'18+':18,'7+':7,'16+':16,'ALL':0}
data['Age Copy']=data['Age'].map(ageMap)
data
Out[9]:
Title Year Age IMDb Rotten Tomatoes Netflix Hulu Prime Video Disney+ Type Directors Genres Country Language Runtime Age Copy
0 Inception 2010 13+ 8.8 87% 1 0 0 0 0 Christopher Nolan Action,Adventure,Sci-Fi,Thriller United States,United Kingdom English,Japanese,French 148.0 13.0
1 The Matrix 1999 18+ 8.7 87% 1 0 0 0 0 Lana Wachowski,Lilly Wachowski Action,Sci-Fi United States English 136.0 18.0
2 Avengers: Infinity War 2018 13+ 8.5 84% 1 0 0 0 0 Anthony Russo,Joe Russo Action,Adventure,Sci-Fi United States English 149.0 13.0
3 Back to the Future 1985 7+ 8.5 96% 1 0 0 0 0 Robert Zemeckis Adventure,Comedy,Sci-Fi United States English 116.0 7.0
4 The Good, the Bad and the Ugly 1966 18+ 8.8 97% 1 0 1 0 0 Sergio Leone Western Italy,Spain,West Germany Italian 161.0 18.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
16739 The Ghosts of Buxley Hall 1980 NaN 6.2 NaN 0 0 0 1 0 Bruce Bilson Comedy,Family,Fantasy,Horror United States English 120.0 NaN
16740 The Poof Point 2001 7+ 4.7 NaN 0 0 0 1 0 Neal Israel Comedy,Family,Sci-Fi United States English 90.0 7.0
16741 Sharks of Lost Island 2013 NaN 5.7 NaN 0 0 0 1 0 Neil Gelinas Documentary United States English NaN NaN
16742 Man Among Cheetahs 2017 NaN 6.6 NaN 0 0 0 1 0 Richard Slater-Jones Documentary United States English NaN NaN
16743 In Beaver Valley 1950 NaN NaN NaN 0 0 0 1 0 James Algar Documentary,Short,Family United States English 32.0 NaN

16744 rows × 16 columns

In [10]:
type(data['Rotten Tomatoes'])
Out[10]:
pandas.core.series.Series
In [13]:
pip install plotly pandas
Requirement already satisfied: plotly in c:\users\tech\anaconda3\lib\site-packages (5.9.0)
Requirement already satisfied: pandas in c:\users\tech\anaconda3\lib\site-packages (1.4.4)
Requirement already satisfied: tenacity>=6.2.0 in c:\users\tech\anaconda3\lib\site-packages (from plotly) (8.0.1)
Requirement already satisfied: pytz>=2020.1 in c:\users\tech\anaconda3\lib\site-packages (from pandas) (2022.1)
Requirement already satisfied: python-dateutil>=2.8.1 in c:\users\tech\anaconda3\lib\site-packages (from pandas) (2.8.2)
Requirement already satisfied: numpy>=1.18.5 in c:\users\tech\anaconda3\lib\site-packages (from pandas) (1.21.5)
Requirement already satisfied: six>=1.5 in c:\users\tech\anaconda3\lib\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
In [14]:
Language=data['Language'].value_counts().head(10)
plt.figure(figsize=(10,8))
sns.barplot(x=Language.index,y=Language.values)
plt.title('Top 10 languages in streaming services')
Out[14]:
Text(0.5, 1.0, 'Top 10 languages in streaming services')
In [15]:
#its just like KayanRH dashboard 
from IPython.display import HTML
import plotly.express as px

# create a Plotly pie chart
figLanguages = px.pie(data, values=Language.values,
             names=Language.index,
             title='Top 10 languages in streaming services',
            height=600)


# convert the figure to an HTML object
html_fig = HTML(figLanguages.to_html())
figLanguages.show()
In [16]:
fig2=px.bar(data,x=Language.index,y=Language.values,title='Top 10 languages in streaming services')
fig2.show()
In [17]:
from IPython.display import HTML
import plotly.express as px

Age = data['Age'].value_counts().head(10)

# create a Plotly pie chart
figAge = px.pie(values=Age.values,
                names=Age.index,
                title='Number of movies in specific age group in all services',
                height=600)

# convert the figure to an HTML object
html_fig = HTML(figAge.to_html())

# display the chart
figAge.show()
In [18]:
# to customize the hover text of a Plotly pie chart,
Age = data['Age'].value_counts().head(10)
Age_df = pd.DataFrame({'age': Age.index, 'count': Age.values})
figAge = px.pie(Age_df, values='count', names='age', title='Number of movies in specific age group in all services')
figAge.show()
In [19]:
# ALL SERVICES
from IPython.display import HTML
import plotly.express as px

Age = data['Age'].value_counts().head(10)

# create a Plotly pie chart
figAge = px.bar(x=Age.index,
                y=Age.values,
                title='Number of movies in specific age group in all services',
                height=600,
                text=Age)

# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())

# display the chart
figAge.show()
In [20]:
#IN NETFLIX
from IPython.display import HTML
import plotly.express as px
Netflix=data[data['Netflix']==1]
# Age = data['Age'].value_counts().head(10)

# create a Plotly pie chart
figAge = px.bar(Netflix['Age'],
                x=Netflix['Age'].value_counts().index,
                y=Netflix['Age'].value_counts(),
                title='Number of movies in specific age group in NETFLIX',
                height=600,
                text=Netflix['Age'].value_counts(),
                color_discrete_sequence=['#E50914'])

# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())

# # update the layout with axis titles
# fig.update_layout(
#     xaxis=dict(title='Age', categoryorder='category ascending'),
#     yaxis=dict(title='Number of movies'),
# )

# display the chart
figAge.show()
In [21]:
#IN Amazon Prime Video
from IPython.display import HTML
import plotly.express as px
PrimeVideo=data[data['Prime Video']==1]


# create a Plotly pie chart
figAge = px.bar(PrimeVideo['Age'],
                x=PrimeVideo['Age'].value_counts().index,
                y=PrimeVideo['Age'].value_counts(),
                title='Number of movies in specific age group in Amazon Prime Video',
                height=600,
                text=PrimeVideo['Age'].value_counts(),
                color_discrete_sequence=['#FF9900'])

figAge.update_layout(xaxis_title="Age Group", yaxis_title="Number of Movies") # set x and y axis labels

# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())

figAge.show()
In [22]:
#IN Disney+
from IPython.display import HTML
import plotly.express as px
Disney=data[data['Disney+']==1]


# create a Plotly pie chart
figAge = px.bar(Disney['Age'],
                x=Disney['Age'].value_counts().index,
                y=Disney['Age'].value_counts(),
                title='Number of movies in specific age group in Disney+',
                height=600,
                text=Disney['Age'].value_counts(),
                color_discrete_sequence=['#153866']) # set the color of the bars to red

figAge.update_layout(xaxis_title="Age Group", yaxis_title="Number of Movies") # set x and y axis labels


# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())

figAge.show()
In [23]:
#IN Hulu
from IPython.display import HTML
import plotly.express as px
Hulu=data[data['Hulu']==1]


# create a Plotly pie chart
figAge = px.bar(Hulu['Age'],
                x=Hulu['Age'].value_counts().index,
                y=Hulu['Age'].value_counts(),
                title='Number of movies in specific age group in Hulu',
                height=600,
                text=Hulu['Age'].value_counts(),
                color_discrete_sequence=['#66aa33']) # set the color of the bars to red

figAge.update_layout(xaxis_title="Age Group", yaxis_title="Number of Movies") # set x and y axis labels


# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())

figAge.show()
In [24]:
from IPython.display import HTML
import plotly.express as px
Hulu=data[data['Hulu']==1]


# create a Plotly pie chart
figAge = px.bar(data,
                x=data['New Rotten Tomatoes'].value_counts().sort_index(),
                y=data['New Rotten Tomatoes'].value_counts(),
                title='Number of movies in specific age group in Rotten Tomatoes Ratings',
                height=600,
#                 text=data['New Rotten Tomatoes'].value_counts(),
                color_discrete_sequence=['blue']) # set the color of the bars to red

figAge.update_layout(xaxis_title="Age Group", yaxis_title="Number of Movies") # set x and y axis labels


# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())

figAge.show()
In [ ]:
data
In [25]:
data['Hulu'].value_counts()
Out[25]:
0    15841
1      903
Name: Hulu, dtype: int64
In [26]:
data['New Rotten Tomatoes'].value_counts().sort_index()
Out[26]:
10      20
100    407
11      27
12      11
13      34
      ... 
95      72
96      73
97      61
98      41
99      26
Name: New Rotten Tomatoes, Length: 99, dtype: int64
In [27]:
data['New Rotten Tomatoes'].value_counts()
Out[27]:
100    407
80     162
50     136
83     131
67     126
      ... 
28      10
7       10
4        9
3        4
2        4
Name: New Rotten Tomatoes, Length: 99, dtype: int64
In [28]:
rt_scores=pd.DataFrame({'Streaming services':['Netflix','Prime video','Hulu','Disney+'],
                                             'Rotten Tomatos Scores':[Netflix['Rotten Tomatoes'].value_counts()[0],
                                                                     PrimeVideo['Rotten Tomatoes'].value_counts()[0],
                                                                     Hulu['Rotten Tomatoes'].value_counts()[0],
                                                                     Disney['Rotten Tomatoes'].value_counts()[0],
                                                                     ] })
rt_scores
Out[28]:
Streaming services Rotten Tomatos Scores
0 Netflix 130
1 Prime video 257
2 Hulu 18
3 Disney+ 19
In [29]:
sort_rt_scores=rt_scores.sort_values(ascending=False, by = 'Rotten Tomatos Scores')

sort_rt_scores
Out[29]:
Streaming services Rotten Tomatos Scores
1 Prime video 257
0 Netflix 130
3 Disney+ 19
2 Hulu 18
In [30]:
from IPython.display import HTML
import plotly.express as px
Hulu=data[data['Hulu']==1]


# create a Plotly pie chart
figAge = px.bar(sort_rt_scores,
                x=sort_rt_scores['Streaming services'],
                y=sort_rt_scores['Rotten Tomatos Scores'],
                title='Rotten tomatos Rating for each services',
                text=sort_rt_scores['Rotten Tomatos Scores'],
                height=600,
#                 text=data['New Rotten Tomatoes'].value_counts(),
                color_discrete_sequence=['blue']) # set the color of the bars to red




# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())

figAge.show()
In [31]:
from IPython.display import HTML
import plotly.express as px
IMDb=data[data['IMDb']==1]

figAge = px.bar(data,
                x=data['IMDb'].value_counts().index,
                y=data['IMDb'].value_counts(),
                title='IMDb Ratings',
                text=data['IMDb'].value_counts(),
                height=600,
#                 text=data['New Rotten Tomatoes'].value_counts(),
                color_discrete_sequence=['#f3ce13']) # set the color of the bars to red





# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())

figAge.show()
In [32]:
RuntimeCount = pd.DataFrame(data['Runtime'].value_counts().sort_values(ascending=False)[:10].items(), columns=['Runtime', 'Count'])
RuntimeCount
Out[32]:
Runtime Count
0 90.0 971
1 95.0 489
2 92.0 434
3 93.0 422
4 85.0 408
... ... ...
152 19.0 8
153 32.0 8
154 9.0 8
155 7.0 8
156 10.0 8

157 rows × 2 columns

In [33]:
figAge = px.bar(RuntimeCount,
                x=RuntimeCount['Runtime'],
                y=RuntimeCount['Count'],
                title='Counts of Runtime of movies',
                text=RuntimeCount['Runtime'],
                height=600,
                color_discrete_sequence=['#f3ce13']) # set the color of the bars





# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())

figAge.show()
In [34]:
# Group the data by Runtime and count the number of occurrences
RuntimeCount = data.groupby('Runtime').size().reset_index(name='Count')

# Create the bar chart
figAge = px.bar(RuntimeCount,
                x='Runtime',
                y='Count',
                title='Counts of Runtime of movies',
                text='Count',
                height=600,
                color_discrete_sequence=['#f3ce13']) # set the color of the bars

# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())

figAge.show()
In [ ]:
#there are an example from 1.13 till end